package com.shujia.core

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

object Demo6sample {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf()
    conf.setMaster("local")
    conf.setAppName("flatMap算子演示")
    val context = new SparkContext(conf)
    //====================================================
    val studentRDD: RDD[String] = context.textFile("spark/data/students.csv")

    /**
     *  sample算子：从前一个RDD的数据中抽样一部分数据
     *
     *  抽取的比例不是正好对应的，在抽取的比例上下浮动 比如1000条抽取10% 抽取的结果在100条左右
     */
    val sampleRDD: RDD[String] = studentRDD.sample(withReplacement = true, 0.1)
    sampleRDD.foreach(println)
  }

}
